Visualization techniques for Convolutional Neural Networks¶
The goal of this lesson to provide insights into the deep neural networks to understand what they 'see' and how they make decisions. We work with pre-trained, very efficient models and try to understand their behaviours.
Lots of interesting techniques are presented in Zeiler, Fergus: Visualizing and Understanding Convolutional Networks, 2013
We also need some sample images from the ImageNet dataset, for example we can download samples from here: https://github.com/ajschumacher/imagen
#import tensorflow as tf
#tf.config.set_visible_devices([], "GPU")
from keras.preprocessing.image import load_img, img_to_array
from keras.applications import vgg16
from keras.layers import Add, Input, Conv2D, Conv2DTranspose, UpSampling2D, ZeroPadding2D, MaxPooling2D, Flatten, Dropout, Dense, Activation, GlobalAveragePooling2D, BatchNormalization
from keras.models import Sequential
from keras.utils import plot_model
from keras import Model
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline
vgg = vgg16.VGG16()
vgg.summary()
Model: "vgg16"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓ ┃ Layer (type) ┃ Output Shape ┃ Param # ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩ │ input_layer (InputLayer) │ (None, 224, 224, 3) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block1_conv1 (Conv2D) │ (None, 224, 224, 64) │ 1,792 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block1_conv2 (Conv2D) │ (None, 224, 224, 64) │ 36,928 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block1_pool (MaxPooling2D) │ (None, 112, 112, 64) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block2_conv1 (Conv2D) │ (None, 112, 112, 128) │ 73,856 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block2_conv2 (Conv2D) │ (None, 112, 112, 128) │ 147,584 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block2_pool (MaxPooling2D) │ (None, 56, 56, 128) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block3_conv1 (Conv2D) │ (None, 56, 56, 256) │ 295,168 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block3_conv2 (Conv2D) │ (None, 56, 56, 256) │ 590,080 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block3_conv3 (Conv2D) │ (None, 56, 56, 256) │ 590,080 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block3_pool (MaxPooling2D) │ (None, 28, 28, 256) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block4_conv1 (Conv2D) │ (None, 28, 28, 512) │ 1,180,160 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block4_conv2 (Conv2D) │ (None, 28, 28, 512) │ 2,359,808 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block4_conv3 (Conv2D) │ (None, 28, 28, 512) │ 2,359,808 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block4_pool (MaxPooling2D) │ (None, 14, 14, 512) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block5_conv1 (Conv2D) │ (None, 14, 14, 512) │ 2,359,808 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block5_conv2 (Conv2D) │ (None, 14, 14, 512) │ 2,359,808 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block5_conv3 (Conv2D) │ (None, 14, 14, 512) │ 2,359,808 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block5_pool (MaxPooling2D) │ (None, 7, 7, 512) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ flatten (Flatten) │ (None, 25088) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ fc1 (Dense) │ (None, 4096) │ 102,764,544 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ fc2 (Dense) │ (None, 4096) │ 16,781,312 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ predictions (Dense) │ (None, 1000) │ 4,097,000 │ └─────────────────────────────────┴────────────────────────┴───────────────┘
Total params: 138,357,544 (527.79 MB)
Trainable params: 138,357,544 (527.79 MB)
Non-trainable params: 0 (0.00 B)
filename = "imagen/n02118333_12193_fox.jpg"
img = load_img(filename, target_size=(224, 224, 3))
img
Prepering the images:
- load the image with
load_img - convert to array with
img_to_array - reshape to the correct shape
- apply the corresponding VGG16 preprocess
from keras.applications.vgg16 import preprocess_input as preprocess_vgg
def prepare_image(filename, width=224, height=224):
return img_array
What are the top predictions? What if we do a different preprocess?¶
img = load_img(filename, target_size=(224, 224, 3))
img_array = img_to_array(img)
img_array = img_array.reshape(1, 224, 224, 3)
img_array.shape, type(img_array), type(img)
((1, 224, 224, 3), numpy.ndarray, PIL.Image.Image)
pred = vgg.predict(img_array)
np.argmax(pred)
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 61ms/step
280
np.argsort(-pred)[0, :10]
array([280, 278, 272, 330, 279, 270, 277, 335, 331, 104])
Use some preprocess¶
# Wrong preprocess
pred = vgg.predict(img_array / 255)
np.argmax(pred)
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 63ms/step
669
# Correct preprocess - VGG
pred = vgg.predict(preprocess_vgg(img_array))
np.argmax(pred)
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 66ms/step
278
np.argsort(-pred)[0, :10]
array([278, 277, 280, 272, 274, 270, 276, 186, 279, 106])
def prepare_image(filename, width=224, height=224):
img = load_img(filename, target_size=(width, height, 3))
img_array = img_to_array(img)
img_array = img_array.reshape(1, width, height, 3)
img_array = preprocess_vgg(img_array)
return img_array
Create truncated models from the VGG layers¶
truncated_vgg = Sequential(vgg.layers[:2])
truncated_vgg.summary()
Model: "sequential"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓ ┃ Layer (type) ┃ Output Shape ┃ Param # ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩ │ block1_conv1 (Conv2D) │ (None, 224, 224, 64) │ 1,792 │ └─────────────────────────────────┴────────────────────────┴───────────────┘
Total params: 1,792 (7.00 KB)
Trainable params: 1,792 (7.00 KB)
Non-trainable params: 0 (0.00 B)
Visualization of the feature maps¶
def plot_maps(preds, n=8):
fig, axs = plt.subplots(n, n, figsize=(16, 16))
cnt = 0
for i1 in range(n):
for i2 in range(n):
axs[i1, i2].set_xticks([])
axs[i1, i2].set_yticks([])
axs[i1, i2].imshow(preds[0, :, :, cnt], cmap="gray")
cnt += 1
plt.show()
img_array = prepare_image(filename)
pred = truncated_vgg.predict(img_array)
pred.shape
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 9ms/step
(1, 224, 224, 64)
plot_maps(pred)
Layer 5¶
truncated_vgg = Sequential(vgg.layers[:5])
truncated_vgg.summary()
pred = truncated_vgg.predict(img_array)
plot_maps(pred)
Model: "sequential_3"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓ ┃ Layer (type) ┃ Output Shape ┃ Param # ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩ │ block1_conv1 (Conv2D) │ (None, 224, 224, 64) │ 1,792 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block1_conv2 (Conv2D) │ (None, 224, 224, 64) │ 36,928 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block1_pool (MaxPooling2D) │ (None, 112, 112, 64) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block2_conv1 (Conv2D) │ (None, 112, 112, 128) │ 73,856 │ └─────────────────────────────────┴────────────────────────┴───────────────┘
Total params: 112,576 (439.75 KB)
Trainable params: 112,576 (439.75 KB)
Non-trainable params: 0 (0.00 B)
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 30ms/step
Layer 8¶
truncated_vgg = Sequential(vgg.layers[:8])
truncated_vgg.summary()
pred = truncated_vgg.predict(img_array)
plot_maps(pred)
Model: "sequential_4"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓ ┃ Layer (type) ┃ Output Shape ┃ Param # ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩ │ block1_conv1 (Conv2D) │ (None, 224, 224, 64) │ 1,792 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block1_conv2 (Conv2D) │ (None, 224, 224, 64) │ 36,928 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block1_pool (MaxPooling2D) │ (None, 112, 112, 64) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block2_conv1 (Conv2D) │ (None, 112, 112, 128) │ 73,856 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block2_conv2 (Conv2D) │ (None, 112, 112, 128) │ 147,584 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block2_pool (MaxPooling2D) │ (None, 56, 56, 128) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block3_conv1 (Conv2D) │ (None, 56, 56, 256) │ 295,168 │ └─────────────────────────────────┴────────────────────────┴───────────────┘
Total params: 555,328 (2.12 MB)
Trainable params: 555,328 (2.12 MB)
Non-trainable params: 0 (0.00 B)
WARNING:tensorflow:5 out of the last 12 calls to <function TensorFlowTrainer.make_predict_function.<locals>.one_step_on_data_distributed at 0x3865f14e0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has reduce_retracing=True option that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for more details. 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 45ms/step
Layer 16¶
truncated_vgg = Sequential(vgg.layers[:16])
truncated_vgg.summary()
pred = truncated_vgg.predict(img_array)
plot_maps(pred)
Model: "sequential_5"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓ ┃ Layer (type) ┃ Output Shape ┃ Param # ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩ │ block1_conv1 (Conv2D) │ (None, 224, 224, 64) │ 1,792 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block1_conv2 (Conv2D) │ (None, 224, 224, 64) │ 36,928 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block1_pool (MaxPooling2D) │ (None, 112, 112, 64) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block2_conv1 (Conv2D) │ (None, 112, 112, 128) │ 73,856 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block2_conv2 (Conv2D) │ (None, 112, 112, 128) │ 147,584 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block2_pool (MaxPooling2D) │ (None, 56, 56, 128) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block3_conv1 (Conv2D) │ (None, 56, 56, 256) │ 295,168 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block3_conv2 (Conv2D) │ (None, 56, 56, 256) │ 590,080 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block3_conv3 (Conv2D) │ (None, 56, 56, 256) │ 590,080 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block3_pool (MaxPooling2D) │ (None, 28, 28, 256) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block4_conv1 (Conv2D) │ (None, 28, 28, 512) │ 1,180,160 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block4_conv2 (Conv2D) │ (None, 28, 28, 512) │ 2,359,808 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block4_conv3 (Conv2D) │ (None, 28, 28, 512) │ 2,359,808 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block4_pool (MaxPooling2D) │ (None, 14, 14, 512) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block5_conv1 (Conv2D) │ (None, 14, 14, 512) │ 2,359,808 │ └─────────────────────────────────┴────────────────────────┴───────────────┘
Total params: 9,995,072 (38.13 MB)
Trainable params: 9,995,072 (38.13 MB)
Non-trainable params: 0 (0.00 B)
WARNING:tensorflow:6 out of the last 13 calls to <function TensorFlowTrainer.make_predict_function.<locals>.one_step_on_data_distributed at 0x387622f20> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has reduce_retracing=True option that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for more details. 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 83ms/step
Layer 19¶
truncated_vgg = Sequential(vgg.layers[:19])
truncated_vgg.summary()
pred = truncated_vgg.predict(img_array)
plot_maps(pred)
Model: "sequential_8"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓ ┃ Layer (type) ┃ Output Shape ┃ Param # ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩ │ block1_conv1 (Conv2D) │ (None, 224, 224, 64) │ 1,792 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block1_conv2 (Conv2D) │ (None, 224, 224, 64) │ 36,928 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block1_pool (MaxPooling2D) │ (None, 112, 112, 64) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block2_conv1 (Conv2D) │ (None, 112, 112, 128) │ 73,856 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block2_conv2 (Conv2D) │ (None, 112, 112, 128) │ 147,584 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block2_pool (MaxPooling2D) │ (None, 56, 56, 128) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block3_conv1 (Conv2D) │ (None, 56, 56, 256) │ 295,168 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block3_conv2 (Conv2D) │ (None, 56, 56, 256) │ 590,080 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block3_conv3 (Conv2D) │ (None, 56, 56, 256) │ 590,080 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block3_pool (MaxPooling2D) │ (None, 28, 28, 256) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block4_conv1 (Conv2D) │ (None, 28, 28, 512) │ 1,180,160 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block4_conv2 (Conv2D) │ (None, 28, 28, 512) │ 2,359,808 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block4_conv3 (Conv2D) │ (None, 28, 28, 512) │ 2,359,808 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block4_pool (MaxPooling2D) │ (None, 14, 14, 512) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block5_conv1 (Conv2D) │ (None, 14, 14, 512) │ 2,359,808 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block5_conv2 (Conv2D) │ (None, 14, 14, 512) │ 2,359,808 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block5_conv3 (Conv2D) │ (None, 14, 14, 512) │ 2,359,808 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block5_pool (MaxPooling2D) │ (None, 7, 7, 512) │ 0 │ └─────────────────────────────────┴────────────────────────┴───────────────┘
Total params: 14,714,688 (56.13 MB)
Trainable params: 14,714,688 (56.13 MB)
Non-trainable params: 0 (0.00 B)
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 94ms/step
Maximum activations¶
We can use a trained model to predict on a large dataset and check the individual neurons to find the images that trigger it, i.e. images with the highest activation values. Thus we have truncate the final layer and analyze the outputs of the 4096-neuron dense layer.
- we read the files
- use the model for predictions
plot_model(vgg, show_shapes=True)
from tqdm import tqdm
import glob
files = sorted(glob.glob("imagen/*.jpg"))
preds = []
files[:15]
['imagen/n00007846_147031_person.jpg', 'imagen/n00007846_149204_person.jpg', 'imagen/n00007846_152343_person.jpg', 'imagen/n00007846_160891_person.jpg', 'imagen/n00007846_98724_person.jpg', 'imagen/n01443537_11099_goldfish.jpg', 'imagen/n01443537_2625_goldfish.jpg', 'imagen/n01443537_2675_goldfish.jpg', 'imagen/n01443537_4691_goldfish.jpg', 'imagen/n01443537_5048_goldfish.jpg', 'imagen/n01495701_1216_ray.jpg', 'imagen/n01495701_1287_ray.jpg', 'imagen/n01495701_2358_ray.jpg', 'imagen/n01495701_3614_ray.jpg', 'imagen/n01495701_4155_ray.jpg']
def read_file(filename, width=224, height=224):
img = img_to_array(load_img(filename, target_size=(width, height, 3))).reshape(1, width, height, 3) / 255
return img
def plot_max_activations(idx, N=8):
return
vgg_4096 = Sequential(vgg.layers[:-1])
vgg_4096.summary()
Model: "sequential_9"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓ ┃ Layer (type) ┃ Output Shape ┃ Param # ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩ │ block1_conv1 (Conv2D) │ (None, 224, 224, 64) │ 1,792 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block1_conv2 (Conv2D) │ (None, 224, 224, 64) │ 36,928 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block1_pool (MaxPooling2D) │ (None, 112, 112, 64) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block2_conv1 (Conv2D) │ (None, 112, 112, 128) │ 73,856 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block2_conv2 (Conv2D) │ (None, 112, 112, 128) │ 147,584 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block2_pool (MaxPooling2D) │ (None, 56, 56, 128) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block3_conv1 (Conv2D) │ (None, 56, 56, 256) │ 295,168 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block3_conv2 (Conv2D) │ (None, 56, 56, 256) │ 590,080 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block3_conv3 (Conv2D) │ (None, 56, 56, 256) │ 590,080 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block3_pool (MaxPooling2D) │ (None, 28, 28, 256) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block4_conv1 (Conv2D) │ (None, 28, 28, 512) │ 1,180,160 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block4_conv2 (Conv2D) │ (None, 28, 28, 512) │ 2,359,808 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block4_conv3 (Conv2D) │ (None, 28, 28, 512) │ 2,359,808 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block4_pool (MaxPooling2D) │ (None, 14, 14, 512) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block5_conv1 (Conv2D) │ (None, 14, 14, 512) │ 2,359,808 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block5_conv2 (Conv2D) │ (None, 14, 14, 512) │ 2,359,808 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block5_conv3 (Conv2D) │ (None, 14, 14, 512) │ 2,359,808 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block5_pool (MaxPooling2D) │ (None, 7, 7, 512) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ flatten (Flatten) │ (None, 25088) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ fc1 (Dense) │ (None, 4096) │ 102,764,544 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ fc2 (Dense) │ (None, 4096) │ 16,781,312 │ └─────────────────────────────────┴────────────────────────┴───────────────┘
Total params: 134,260,544 (512.16 MB)
Trainable params: 134,260,544 (512.16 MB)
Non-trainable params: 0 (0.00 B)
preds = []
for filename in tqdm(files):
img_array = prepare_image(filename)
pred = vgg_4096.predict(img_array, verbose=0)
preds.append(pred)
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1000/1000 [01:06<00:00, 15.03it/s]
preds = np.array(preds).reshape(-1, 4096)
preds.shape
(1000, 4096)
img_arrays = []
for filename in tqdm(files):
img_array = prepare_image(filename)
img_arrays.append(img_array[0])
img_arrays = np.array(img_arrays)
print(img_arrays.shape)
preds2 = vgg_4096.predict(img_arrays)
100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1000/1000 [00:01<00:00, 639.28it/s]
(1000, 224, 224, 3) 32/32 ━━━━━━━━━━━━━━━━━━━━ 32s 1s/step
preds
array([[0. , 2.6333168 , 1.627181 , ..., 4.1839786 , 0. ,
0. ],
[0. , 0.99170005, 0.53591937, ..., 0. , 0. ,
0. ],
[0. , 0. , 0. , ..., 0. , 0. ,
0. ],
...,
[1.1621014 , 0.3371096 , 0. , ..., 0. , 0. ,
0.62498105],
[1.1120191 , 1.9480659 , 0. , ..., 0. , 0. ,
0. ],
[0. , 0. , 0. , ..., 0. , 1.1000795 ,
0. ]], dtype=float32)
preds2
array([[0. , 2.633317 , 1.6271806 , ..., 4.183977 , 0. ,
0. ],
[0. , 0.9916997 , 0.5359192 , ..., 0. , 0. ,
0. ],
[0. , 0. , 0. , ..., 0. , 0. ,
0. ],
...,
[1.1621019 , 0.33710974, 0. , ..., 0. , 0. ,
0.62498176],
[1.1120186 , 1.9480655 , 0. , ..., 0. , 0. ,
0. ],
[0. , 0. , 0. , ..., 0. , 1.1000799 ,
0. ]], dtype=float32)
preds.shape
(1000, 4096)
neuron_idx = 0
fig, ax = plt.subplots(1, 8, figsize=(16, 6))
cnt = 0
for file_index in np.argsort(-preds[:, neuron_idx])[:8]:
filename = files[file_index]
ax[cnt].imshow(read_file(filename).reshape(224, 224, 3))
ax[cnt].set_title(filename.split("_")[-1])
cnt += 1
neuron_idx = 2222
fig, ax = plt.subplots(1, 8, figsize=(16, 6))
cnt = 0
for file_index in np.argsort(-preds[:, neuron_idx])[:8]:
filename = files[file_index]
ax[cnt].imshow(read_file(filename).reshape(224, 224, 3))
ax[cnt].set_title(filename.split("_")[-1])
cnt += 1
neuron_idx = 234
fig, ax = plt.subplots(1, 8, figsize=(16, 6))
cnt = 0
for file_index in np.argsort(-preds[:, neuron_idx])[:8]:
filename = files[file_index]
ax[cnt].imshow(read_file(filename).reshape(224, 224, 3))
ax[cnt].set_title(filename.split("_")[-1])
cnt += 1
Further interesting experiments in the literature:
TSNE visualization on the penultimate layer¶
from sklearn.manifold import TSNE
tsne = TSNE(n_components=2, random_state=42)
P = tsne.fit_transform(preds)
preds.shape, P.shape
((1000, 4096), (1000, 2))
label = "fox"
special_indices = [idx for idx, file in enumerate(files) if \
file.endswith(label + ".jpg")]
plt.scatter(P[:, 0], P[:, 1], s=1)
plt.scatter(P[special_indices, 0], P[special_indices, 1], s=2, c="red")
label = "squirrel"
special_indices = [idx for idx, file in enumerate(files) if \
file.endswith(label + ".jpg")]
plt.scatter(P[special_indices, 0], P[special_indices, 1], s=4, c="orange")
label = "lion"
special_indices = [idx for idx, file in enumerate(files) if \
file.endswith(label + ".jpg")]
plt.scatter(P[special_indices, 0], P[special_indices, 1], s=4, c="green")
<matplotlib.collections.PathCollection at 0x1675ce210>
Scatter plot of the images¶
from matplotlib.offsetbox import OffsetImage, AnnotationBbox
if 1:
def getImage(filename):
return OffsetImage(load_img(filename, target_size=(64, 64, 3)))
x = P[:, 0]
y = P[:, 1]
fig, ax = plt.subplots(figsize=(32, 32))
ax.scatter(x, y)
for x0, y0, file in zip(x, y, files):
ab = AnnotationBbox(getImage(file), (x0, y0), frameon=False)
ax.add_artist(ab)
ax.set_xticks([])
ax.set_yticks([])
if 1:
def getImage(filename):
img = load_img(filename, target_size=(64, 64, 3))
return OffsetImage(img, alpha=0.5)
x = P[:, 0]
y = P[:, 1]
fig, ax = plt.subplots(figsize=(32, 32))
ax.scatter(x, y, alpha=0.5)
for x0, y0, file in zip(x, y, files):
ab = AnnotationBbox(getImage(file), (x0, y0), frameon=False)
ax.add_artist(ab)
ax.set_xticks([])
ax.set_yticks([])
plt.show()
Interactive Bokeh visualization¶
import pandas as pd
import bokeh
from bokeh.plotting import figure, show, ColumnDataSource
from bokeh.models import HoverTool, ZoomInTool, ZoomOutTool
from io import BytesIO
import base64
from bokeh.io import output_notebook , output_file
if 1:
output_notebook()
labels = [file.split("_")[-1].split(".")[0] for file in files]
source = ColumnDataSource(
data=dict(
x = P[:, 0],
y = P[:, 1],
desc = labels,
imgs = files
)
)
hover = HoverTool(
tooltips="""
<div>
<div>
<img
src="@imgs" height="128" alt="@imgs" width="128"
style="float: left; margin: 0px 15px 15px 0px;"
border="2"
></img>
</div>
<div>
<span style="font-size: 17px; font-weight: bold;">@desc</span>
<span style="font-size: 15px; color: #966;">[$index]</span>
</div>
<div>
<span style="font-size: 15px;">Location</span>
<span style="font-size: 10px; color: #696;">($x, $y)</span>
</div>
</div>
"""
)
p = figure(width=800, height=800, tools=[hover, "pan, wheel_zoom, box_zoom, reset"],
title="Mouse over the dots")
p.scatter('x', 'y', size=10, source=source)
show(p)
#output_file("interactive_tsne.html")
Heatmap visualizations by occlusions¶
We can analyze the model's localization effects if we mask out certain parts of the image and look at the predictions. This way we can create a heatmap showing the effects of the pixels. We are going to need the ImageNet label list
from keras.applications.inception_resnet_v2 import InceptionResNetV2
import ast
label_dict = ast.literal_eval(open("imagenet_labels.txt", "r").read())
inception = InceptionResNetV2(input_shape=(299, 299, 3))
file = "imagen/n02118333_12193_fox.jpg"
target_w, target_h = 299, 299
img = read_file(file, target_w, target_h)
plt.imshow(img[0])
<matplotlib.image.AxesImage at 0x3c74f7b90>
What are top predictions for the model?¶
filename = "imagen/n02118333_12193_fox.jpg"
img = load_img(filename, target_size=(299, 299, 3))
img_array = img_to_array(img)
img_array = img_array.reshape(1, 299, 299, 3) / 255
pred = inception.predict(img_array)
label_dict[np.argmax(pred)]
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 172ms/step
'red fox, Vulpes vulpes'
for class_idx in np.argsort(-pred)[0, :5]:
print(class_idx, pred[0, class_idx], label_dict[class_idx])
277 0.62009114 red fox, Vulpes vulpes 278 0.29941013 kit fox, Vulpes macrotis 280 0.014416512 grey fox, gray fox, Urocyon cinereoargenteus 274 0.0022625863 dhole, Cuon alpinus 272 0.0014977839 coyote, prairie wolf, brush wolf, Canis latrans
What if we occlude a small portion of the image?¶
from copy import deepcopy
start_w, start_h = 120, 120
window_w, window_h = 64, 64
img2 = deepcopy(img_array[0])
img2[start_h: start_h + window_h, start_w: start_w + window_w, :] = 0
plt.imshow(img2)
<matplotlib.image.AxesImage at 0x3c93dfa50>
pred = inception.predict(img2.reshape(1, 299, 299, 3))
for class_idx in np.argsort(-pred)[0, :5]:
print(class_idx, pred[0, class_idx], label_dict[class_idx])
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 164ms/step 277 0.37961304 red fox, Vulpes vulpes 278 0.2597882 kit fox, Vulpes macrotis 274 0.10343993 dhole, Cuon alpinus 356 0.05310984 weasel 298 0.043077104 mongoose
Let's move the occlusion window and see the top prediction of the model¶
step = 10 # stride = 10
start_w_vec = np.arange(0, target_w - window_w, step)
start_h_vec = np.arange(0, target_h - window_h, step)
images = []
for start_w in start_w_vec:
for start_h in start_h_vec:
img2 = deepcopy(img_array[0])
img2[start_h: start_h + window_h, start_w: start_w + window_w, :] = 0
images.append(img2)
images = np.array(images)
images.shape
(576, 299, 299, 3)
preds = inception.predict(images)
18/18 ━━━━━━━━━━━━━━━━━━━━ 21s 1s/step
class_preds = np.argmax(preds, axis=1)
Let's count the top predictions¶
from collections import Counter
Counter(class_preds)
Counter({277: 552, 278: 18, 298: 3, 299: 1, 356: 1, 336: 1})
for c in Counter(class_preds):
print(c, label_dict[c])
277 red fox, Vulpes vulpes 278 kit fox, Vulpes macrotis 299 meerkat, mierkat 298 mongoose 356 weasel 336 marmot
Create a plot to visualize how the real class prediction changes with the moving window¶
special_index = 277
plt.imshow(preds[:, special_index].reshape(24, 24))
plt.colorbar()
<matplotlib.colorbar.Colorbar at 0x3838fc0d0>
Refine the localized predictions by taking the overlapping averages¶
probs = np.zeros((target_w, target_h))
counts = np.zeros((target_w, target_h))
step = 10 # stride = 10
start_w_vec = np.arange(0, target_w - window_w, step)
start_h_vec = np.arange(0, target_h - window_h, step)
cnt = 0
for start_w in start_w_vec:
for start_h in start_h_vec:
#img2 = deepcopy(img_array[0])
#img2[start_h: start_h + window_h, start_w: start_w + window_w, :] = 0
#images.append(img2)
counts[start_h: start_h + window_h, start_w: start_w + window_w] += 1
probs[start_h: start_h + window_h, start_w: start_w + window_w] += preds[cnt, special_index]
cnt += 1
plt.imshow(counts)
plt.colorbar()
<matplotlib.colorbar.Colorbar at 0x1675d40d0>
plt.imshow(img_array[0])
plt.imshow(probs / counts, alpha=0.4, cmap="jet")
plt.colorbar()
/var/folders/sq/_vdvf2nn51nbbtm87hrx368h0000gn/T/ipykernel_10170/994929873.py:2: RuntimeWarning: invalid value encountered in divide plt.imshow(probs / counts, alpha=0.4, cmap="jet")
<matplotlib.colorbar.Colorbar at 0x3caf009d0>
Example #2¶
filename = "imagen/n01882714_11334_koala_bear.jpg"
img = read_file(filename, target_w, target_h)
plt.imshow(img[0])
img = load_img(filename, target_size=(299, 299, 3))
img_array = img_to_array(img)
img_array = img_array.reshape(1, 299, 299, 3) / 255
################################### MOVING WINDOW ###############################
step = 10 # stride = 10
start_w_vec = np.arange(0, target_w - window_w, step)
start_h_vec = np.arange(0, target_h - window_h, step)
images = []
for start_w in start_w_vec:
for start_h in start_h_vec:
img2 = deepcopy(img_array[0])
img2[start_h: start_h + window_h, start_w: start_w + window_w, :] = 0
images.append(img2)
images = np.array(images)
print(images.shape)
################################### PREDICTION ###################
preds = inception.predict(images)
class_preds = np.argmax(preds, axis=1)
print(Counter(class_preds))
for c in Counter(class_preds):
print(c, label_dict[c])
(576, 299, 299, 3) 18/18 ━━━━━━━━━━━━━━━━━━━━ 21s 1s/step Counter({105: 453, 448: 120, 920: 2, 863: 1}) 105 koala, koala bear, kangaroo bear, native bear, Phascolarctos cinereus 448 birdhouse 920 traffic light, traffic signal, stoplight 863 totem pole
special_index = 105
probs = np.zeros((target_w, target_h))
counts = np.zeros((target_w, target_h))
step = 10 # stride = 10
start_w_vec = np.arange(0, target_w - window_w, step)
start_h_vec = np.arange(0, target_h - window_h, step)
cnt = 0
for start_w in start_w_vec:
for start_h in start_h_vec:
#img2 = deepcopy(img_array[0])
#img2[start_h: start_h + window_h, start_w: start_w + window_w, :] = 0
#images.append(img2)
counts[start_h: start_h + window_h, start_w: start_w + window_w] += 1
probs[start_h: start_h + window_h, start_w: start_w + window_w] += preds[cnt, special_index]
cnt += 1
plt.imshow(img_array[0])
plt.imshow(probs / counts, alpha=0.4, cmap="jet")
plt.colorbar()
/var/folders/sq/_vdvf2nn51nbbtm87hrx368h0000gn/T/ipykernel_10170/2946375911.py:24: RuntimeWarning: invalid value encountered in divide plt.imshow(probs / counts, alpha=0.4, cmap="jet")
<matplotlib.colorbar.Colorbar at 0x3cb0fe150>
Example #3¶
filename = "imagen/n07697100_9282_hamburger.jpg"
img = read_file(filename, target_w, target_h)
plt.imshow(img[0])
img = load_img(filename, target_size=(299, 299, 3))
img_array = img_to_array(img)
img_array = img_array.reshape(1, 299, 299, 3) / 255
################################### MOVING WINDOW ###############################
step = 10 # stride = 10
start_w_vec = np.arange(0, target_w - window_w, step)
start_h_vec = np.arange(0, target_h - window_h, step)
images = []
for start_w in start_w_vec:
for start_h in start_h_vec:
img2 = deepcopy(img_array[0])
img2[start_h: start_h + window_h, start_w: start_w + window_w, :] = 0
images.append(img2)
images = np.array(images)
print(images.shape)
################################### PREDICTION ###################
preds = inception.predict(images)
class_preds = np.argmax(preds, axis=1)
print(Counter(class_preds))
for c in Counter(class_preds):
print(c, label_dict[c])
(576, 299, 299, 3) 18/18 ━━━━━━━━━━━━━━━━━━━━ 21s 1s/step Counter({933: 507, 917: 69}) 933 cheeseburger 917 comic book
special_index = 933
probs = np.zeros((target_w, target_h))
counts = np.zeros((target_w, target_h))
step = 10 # stride = 10
start_w_vec = np.arange(0, target_w - window_w, step)
start_h_vec = np.arange(0, target_h - window_h, step)
cnt = 0
for start_w in start_w_vec:
for start_h in start_h_vec:
#img2 = deepcopy(img_array[0])
#img2[start_h: start_h + window_h, start_w: start_w + window_w, :] = 0
#images.append(img2)
counts[start_h: start_h + window_h, start_w: start_w + window_w] += 1
probs[start_h: start_h + window_h, start_w: start_w + window_w] += preds[cnt, special_index]
cnt += 1
plt.imshow(img_array[0])
plt.imshow(probs / counts, alpha=0.4, cmap="jet")
plt.colorbar()
/var/folders/sq/_vdvf2nn51nbbtm87hrx368h0000gn/T/ipykernel_10170/1109267743.py:24: RuntimeWarning: invalid value encountered in divide plt.imshow(probs / counts, alpha=0.4, cmap="jet")
<matplotlib.colorbar.Colorbar at 0x3cb294950>